STATS 506

Problem Set 5

Author

Sky Shi

Modified

November 21, 2024

The link to the problem set 5 GitHub repository is at: https://github.com/skyshi1/STAT506/tree/main/problemSet5, as a sub-folder of the STATS 506 repository.

Problem 1 - OOP Programming

a. Define the rational class

Use the Rcpp for the C functions:

# Load necessary libraries
library(methods)  # For S4 class and methods
library(Rcpp)     # For integrating C++ functions into R

# Define a C++ function to compute the greatest common divisor (GCD)
cppFunction('
int C_gcd(int x, int y) {
  return std::gcd(x, y);  // Use the standard library gcd function
}
')

# Define a C++ function to compute the least common multiple (LCM)
cppFunction('
int C_lcm(int x, int y) {
  return std::lcm(x, y);  // Use the standard library lcm function
}
')

Get the class:

# Define the Rational S4 Class
setClass(
  "Rational",
  slots = c(
    numerator = "integer",      # Slot to store the numerator of the rational number
    denominator = "integer"     # Slot to store the denominator of the rational number
  ),
  validity = function(object) {
    # Ensure the denominator is not zero, as division by zero is undefined
    if (object@denominator == 0) {
      stop("Denominator cannot be zero.")
    }
    # Ensure both numerator and denominator are integers
    if (!is.integer(object@numerator) || !is.integer(object@denominator)) {
      stop("Both numerator and denominator must be integers.")
    }
    TRUE  # Return TRUE if the object passes validation checks
  }
)

# Constructor Function for Rational Class
Rational <- function(numerator, denominator) {
  # Validate that the denominator is not zero
  if (denominator == 0) stop("Denominator cannot be zero.")
  
  # Simplify the fraction using GCD (Greatest Common Divisor)
  gcd <- C_gcd(as.integer(numerator), as.integer(denominator))
  numerator <- numerator / gcd  # Divide numerator by GCD
  denominator <- denominator / gcd  # Divide denominator by GCD
  
  # Ensure both numerator and denominator remain integers after simplification
  if (numerator %% 1 != 0 || denominator %% 1 != 0) {
    stop("Both numerator and denominator must be integers.")
  }
  
  # Create and return a new Rational object
  new("Rational", numerator = as.integer(numerator), denominator = as.integer(denominator))
}

# Show Method to Print the Rational Object
setMethod(
  "show",
  "Rational",
  function(object) {
    # Print the rational number in the form "numerator/denominator"
    cat(object@numerator, "/", object@denominator, "\n")
  }
)

# Method to Simplify a Rational Object
setGeneric("simplify", function(object) standardGeneric("simplify"))
[1] "simplify"
setMethod(
  "simplify",
  "Rational",
  function(object) {
    # Simplify the rational number using GCD
    gcd <- C_gcd(object@numerator, object@denominator)
    # Create a new Rational object with simplified numerator and denominator
    Rational(object@numerator / gcd, object@denominator / gcd)
  }
)

# Method to Compute the Quotient of a Rational Object
setGeneric("quotient", function(object, digits = NULL) standardGeneric("quotient"))
[1] "quotient"
setMethod(
  "quotient",
  "Rational",
  function(object, digits = NULL) {
    # Calculate the decimal value of the rational number
    result <- object@numerator / object@denominator
    # If digits are provided, round the result to the specified number of decimal places
    if (!is.null(digits)) {
      if (!is.numeric(digits) || length(digits) != 1) {
        stop("digits must be a single numeric value.")
      }
      return(round(result, digits))  # Return the rounded value
    }
    result  # Return the unrounded result
  }
)

# Addition Method for Rational Objects
setMethod(
  "+",
  c("Rational", "Rational"),
  function(e1, e2) {
    # Find the Least Common Multiple (LCM) of the denominators
    lcm_den <- C_lcm(e1@denominator, e2@denominator)
    # Adjust numerators to have the same denominator, then add them
    num <- e1@numerator * (lcm_den / e1@denominator) + e2@numerator * (lcm_den / e2@denominator)
    # Create a new Rational object for the result
    Rational(num, lcm_den)
  }
)

# Subtraction Method for Rational Objects
setMethod(
  "-",
  c("Rational", "Rational"),
  function(e1, e2) {
    # Find the Least Common Multiple (LCM) of the denominators
    lcm_den <- C_lcm(e1@denominator, e2@denominator)
    # Adjust numerators to have the same denominator, then subtract them
    num <- e1@numerator * (lcm_den / e1@denominator) - e2@numerator * (lcm_den / e2@denominator)
    # Create a new Rational object for the result
    Rational(num, lcm_den)
  }
)

# Multiplication Method for Rational Objects
setMethod(
  "*",
  c("Rational", "Rational"),
  function(e1, e2) {
    # Multiply numerators and denominators directly
    num <- e1@numerator * e2@numerator
    den <- e1@denominator * e2@denominator
    # Create a new Rational object for the result
    Rational(num, den)
  }
)

# Division Method for Rational Objects
setMethod(
  "/",
  c("Rational", "Rational"),
  function(e1, e2) {
    # Division is equivalent to multiplying by the reciprocal
    if (e2@numerator == 0) stop("Division by zero is not allowed.")
    num <- e1@numerator * e2@denominator
    den <- e1@denominator * e2@numerator
    # Create a new Rational object for the result
    Rational(num, den)
  }
)

b. Use your rational class to create three objects

We create these objects first:

# Create three Rational objects
r1 <- Rational(24, 6)
r2 <- Rational(7, 230)
r3 <- Rational(0, 4) 

Then do the operations:

# 1. Display r1 and r3
cat("r1:\n")
r1:
print(r1)  # Should display the simplified version of 24/6
4 / 1 
cat("\nr3:\n")

r3:
print(r3)  # Should display 0/1
0 / 1 
# 2. Arithmetic operations
cat("\nr1 + r2:\n")

r1 + r2:
print(r1 + r2)  # Add r1 and r2
927 / 230 
cat("\nr1 - r2:\n")

r1 - r2:
print(r1 - r2)  # Subtract r2 from r1
913 / 230 
cat("\nr1 * r2:\n")

r1 * r2:
print(r1 * r2)  # Multiply r1 and r2
14 / 115 
cat("\nr1 / r2:\n")

r1 / r2:
print(r1 / r2)  # Divide r1 by r2
920 / 7 
cat("\nr1 + r3:\n")

r1 + r3:
print(r1 + r3)  # Add r1 and r3
4 / 1 
cat("\nr1 * r3:\n")

r1 * r3:
print(r1 * r3)  # Multiply r1 and r3 (should result in 0/1)
0 / 1 
cat("\nr2 / r3:\n")

r2 / r3:
tryCatch(
  print(r2 / r3),  # This should throw an error because division by 0 is not allowed
  error = function(e) cat("Error:", e$message, "\n")
)
Error: Division by zero is not allowed. 
# 3. Quotient method
cat("\nQuotient of r1:\n")

Quotient of r1:
print(quotient(r1))
[1] 4
cat("\nQuotient of r2:\n")

Quotient of r2:
print(quotient(r2))
[1] 0.03043478
cat("\nQuotient of r2 (3 digits):\n")

Quotient of r2 (3 digits):
print(quotient(r2, digits = 3))
[1] 0.03
cat("\nQuotient of r2 (invalid digits = 3.14):\n")

Quotient of r2 (invalid digits = 3.14):
tryCatch(
  print(quotient(r2, digits = 3.14)),  # Should throw an error due to invalid digits argument
  error = function(e) cat("Error:", e$message, "\n")
)
[1] 0.03
cat("\nQuotient of r2 (invalid digits = 'avocado'):\n")

Quotient of r2 (invalid digits = 'avocado'):
tryCatch(
  print(quotient(r2, digits = "avocado")),  # Should throw an error due to invalid digits argument
  error = function(e) cat("Error:", e$message, "\n")
)
Error: digits must be a single numeric value. 
q2 <- quotient(r2, digits = 3)
cat("\nStored quotient q2:\n")

Stored quotient q2:
print(q2)
[1] 0.03
cat("\nQuotient of r3:\n")

Quotient of r3:
print(quotient(r3))
[1] 0
# 4. Simplify method
cat("\nSimplified r1:\n")

Simplified r1:
print(simplify(r1))
4 / 1 
cat("\nSimplified r2:\n")

Simplified r2:
print(simplify(r2))
7 / 230 
cat("\nSimplified r3:\n")

Simplified r3:
print(simplify(r3))
0 / 1 

c. Check validator

# Test Cases for Validation

# Case 1: Valid rational number
cat("Test Case 1: Valid Rational Number (24/6)\n")
Test Case 1: Valid Rational Number (24/6)
try(r_valid <- Rational(24, 6))  # Should work
print(r_valid)
4 / 1 
# Case 2: Zero denominator
cat("\nTest Case 2: Zero Denominator\n")

Test Case 2: Zero Denominator
try(r_invalid_zero_den <- Rational(1, 0))  # Should raise an error
Error in Rational(1, 0) : Denominator cannot be zero.
# Case 3: Non-integer numerator
cat("\nTest Case 3: Non-integer Numerator (3.5/2)\n")

Test Case 3: Non-integer Numerator (3.5/2)
try(r_invalid_non_integer_num <- Rational(2.5, 5))  # Should raise an error
Error in Rational(2.5, 5) : 
  Both numerator and denominator must be integers.
# Case 4: Non-integer denominator
cat("\nTest Case 4: Non-integer Denominator (3/2.5)\n")

Test Case 4: Non-integer Denominator (3/2.5)
try(r_invalid_non_integer_den <- Rational(5, 2.5))  # Should raise an error
Error in Rational(5, 2.5) : 
  Both numerator and denominator must be integers.
# Case 5: Non-numeric inputs
cat("\nTest Case 5: Non-numeric Inputs ('a'/'b')\n")

Test Case 5: Non-numeric Inputs ('a'/'b')
try(r_invalid_both_non_integer <- Rational(3.5, 1.5))  # Should raise an error
Error in Rational(3.5, 1.5) : 
  Both numerator and denominator must be integers.
# Case 6: Negative denominator (validation should adjust this automatically)
cat("\nTest Case 6: Negative Denominator (-3/4)\n")

Test Case 6: Negative Denominator (-3/4)
try(r_negative_denominator <- Rational(3, -4))  # Should work with adjusted signs
print(r_negative_denominator)
3 / -4 

Problem 2 - plotly

a. Does the distribution of genre of sales across years appear to change?

Plot from last time for comparison:

suppressPackageStartupMessages({
  library(tidyverse)
  library(plotly)
  library(ggplot2)
})
art_sales <- read.csv("../data/df_for_ml_improved_new_market.csv")

art_sales_long <- art_sales %>%
  pivot_longer(
    cols = starts_with("Genre___"),  # Select all genre-related columns
    names_to = "genre",
    values_to = "present",
    values_drop_na = TRUE
  ) %>%
  filter(present == 1) %>%  # Keep only rows where the genre is present
  mutate(genre = str_replace(genre, "Genre___", ""))  # Remove "Genre___" prefix from genre labels

# Group data by year and genre to count sales
genre_distribution <- art_sales_long %>%
  group_by(year, genre) %>%
  summarise(count = n(), .groups = "drop")

# Create the updated stacked bar plot
ggplot(genre_distribution, aes(x = factor(year), y = count, fill = genre)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Set2") +  # Set the color palette
  labs(  # Change the title and labels
    title = "Distribution of Genre Sales Across Years",
    x = "Year",
    y = "Proportion of Sales",
    fill = "Genre"
  ) +
  theme_minimal(base_size = 10) +
  theme(  # Change title and axis styles
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12)
  )

New plot by plotly:

# Preprocess the data for genre distribution
art_sales_long <- art_sales %>%
  pivot_longer(
    cols = starts_with("Genre___"),
    names_to = "genre",
    values_to = "present",
    values_drop_na = TRUE
  ) %>%
  filter(present == 1) %>%
  mutate(genre = str_replace(genre, "Genre___", ""))

# Group data by year and genre to calculate proportions
genre_distribution <- art_sales_long %>%
  group_by(year, genre) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(year) %>%
  mutate(proportion = count / sum(count)) %>%  # Calculate proportion of sales by genre
  ungroup()

# Create an interactive stacked bar plot for proportions
genre_distribution_plot <- genre_distribution %>%
  plot_ly(
    x = ~year,
    y = ~proportion,
    color = ~genre,
    type = "bar",
    text = ~paste(
      "Year:", year,
      "<br>Genre:", genre,
      "<br>Proportion:", scales::percent(proportion, accuracy = 0.1)
    ),
    hoverinfo = "text"
  ) %>%
  layout(
    title = "Proportional Distribution of Genre Sales Across Years (Interactive)",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Proportion", tickformat = "%"),  # Format y-axis as percentages
    barmode = "stack",
    legend = list(title = list(text = "Genre"))
  )

genre_distribution_plot

From this plot, we see that starting from 1997, the distribution of genres changed over time. For example, (almost) no one bought print before 2000, but more people are having print around 2008. Also, we can see there is a decline in the proportion of people getting paintings and others. The proportion of sculpture and photography is stable over time after 2000.

b. Generate an interactive plot with plotly that can address 2 questions from last time

# Preprocess data for sales price analysis
price_by_genre <- art_sales %>%
  pivot_longer(
    cols = starts_with("Genre___"),
    names_to = "genre",
    values_to = "present",
    values_drop_na = TRUE
  ) %>%
  filter(present == 1) %>%
  mutate(genre = str_replace(genre, "Genre___", "")) %>%
  group_by(year, genre) %>%
  summarise(avg_price = mean(price_usd, na.rm = TRUE), .groups = "drop")

# Combine overall average sales price with genre-specific averages
overall_avg_price <- art_sales %>%
  group_by(year) %>%
  summarise(avg_price = mean(price_usd, na.rm = TRUE), .groups = "drop") %>%
  mutate(genre = "Overall")  # Add a genre category for overall data

# Combine the two datasets
price_data_combined <- bind_rows(price_by_genre, overall_avg_price)

# Create an interactive plot
price_plot <- price_data_combined %>%
  plot_ly(
    x = ~year,
    y = ~avg_price,
    color = ~genre,
    type = "scatter",
    mode = "lines+markers",
    text = ~paste(
      "Year:", year,
      "<br>Genre:", genre,
      "<br>Average Price (USD):", scales::dollar(avg_price)
    ),
    hoverinfo = "text"
  ) %>%
  layout(
    title = "Change in Sales Price Over Time (Overall and by Genre)",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Average Price (USD)", tickformat = "$"),
    legend = list(title = list(text = "Genre"))
  )

price_plot

From this plot, we see that starting from 1997, the overall tread of the sale price is increasing with time. In addition, we can see a peak in sale price at 2008 and dropped later, possibly due to economic recession. From this plot, we clearly see that the genre will affect the change in sales price over time. The others and painting has the lowest price increase over time. The print price is changing a over time, with an overall trend of increasing but very difference for each year. The sculpture genre, on the other hand, is increasing steadily over time. And the photography has the fastest and largest change in price. All of them have the highest prices in 2008.

Problem 3 - data.table

a. Tables for departure delay and arrival delay

# Load necessary libraries
suppressPackageStartupMessages({
  library(data.table)
  library(nycflights13)
})

# Convert data to data.table
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)
planes_dt <- as.data.table(planes)

### Part 1a: Departure Delay Summary
departure_delay_summary <- flights_dt[
  !is.na(dep_delay),  # Exclude rows with NA in dep_delay
  .(
    mean_dep_delay = mean(dep_delay, na.rm = TRUE),  # Calculate mean departure delay
    median_dep_delay = median(dep_delay, na.rm = TRUE),  # Calculate median departure delay
    flight_count = .N  # Count the number of flights
  ), by = origin
][flight_count >= 10][  # Exclude origins with fewer than 10 flights
  airports_dt, on = .(origin = faa), nomatch = NULL  # Join with airport names, remove unmatched rows
][
  , .(
    `Airport Name` = name,
    `Mean Departure Delay (min)` = mean_dep_delay,
    `Median Departure Delay (min)` = median_dep_delay,
    `Number of Flights` = flight_count
  )
][order(-`Mean Departure Delay (min)`)]  # Order by descending mean departure delay

### Part 1b: Arrival Delay Summary
arrival_delay_summary <- flights_dt[
  !is.na(arr_delay),  # Exclude rows with NA in arr_delay
  .(
    mean_arr_delay = mean(arr_delay, na.rm = TRUE),  # Calculate mean arrival delay
    median_arr_delay = median(arr_delay, na.rm = TRUE),  # Calculate median arrival delay
    flight_count = .N  # Count the number of flights
  ), by = dest
][flight_count >= 10][  # Exclude destinations with fewer than 10 flights
  airports_dt, on = .(dest = faa), nomatch = NULL  # Join with airport names, remove unmatched rows
][
  , .(
    `Airport Name` = name,
    `Mean Arrival Delay (min)` = mean_arr_delay,
    `Median Arrival Delay (min)` = median_arr_delay,
    `Number of Flights` = flight_count
  )
][order(-`Mean Arrival Delay (min)`)]  # Order by descending mean arrival delay

Print the tables out nicely:

# Load knitr for better table formatting
library(knitr)
# Print the arrival delay summary as a formatted table
kable(
  arrival_delay_summary,
  format = "html",
  align = "c",
  col.names = c("Airport Name", "Mean Delay (min)", "Median Delay (min)", "Flights"),
  caption = "Arrival Delay Summary"
) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Arrival Delay Summary
Airport Name Mean Delay (min) Median Delay (min) Flights
Columbia Metropolitan 41.7641509 28.0 106
Tulsa Intl 33.6598639 14.0 294
Will Rogers World 30.6190476 16.0 315
Jackson Hole Airport 28.0952381 15.0 21
Mc Ghee Tyson 24.0692042 2.0 578
Dane Co Rgnl Truax Fld 20.1960432 1.0 556
Richmond Intl 20.1112532 1.0 2346
Akron Canton Regional Airport 19.6983373 3.0 842
Des Moines Intl 19.0057361 0.0 523
Gerald R Ford Intl 18.1895604 1.0 728
Birmingham Intl 16.8773234 -2.0 269
Theodore Francis Green State 16.2346369 1.0 358
Greenville-Spartanburg International 15.9354430 -0.5 790
Cincinnati Northern Kentucky Intl 15.3645638 -3.0 3725
Savannah Hilton Head Intl 15.1295060 -1.0 749
Manchester Regional Airport 14.7875536 -3.0 932
Eppley Afld 14.6988984 -2.0 817
Yeager 14.6716418 -1.5 134
Kansas City Intl 14.5140584 0.0 1885
Albany Intl 14.3971292 -4.0 418
General Mitchell Intl 14.1672204 0.0 2709
Piedmont Triad 14.1126005 -2.0 1492
Washington Dulles Intl 13.8642021 -3.0 5383
Cherry Capital Airport 12.9684211 -10.0 95
James M Cox Dayton Intl 12.6804861 -3.0 1399
Louisville International Airport 12.6693841 -2.0 1104
Chicago Midway Intl 12.3642236 -1.0 4025
Sacramento Intl 12.1099291 4.0 282
Jacksonville Intl 11.8448342 -2.0 2623
Nashville Intl 11.8124589 -2.0 6084
Portland Intl Jetport 11.6604021 -4.0 2288
Greater Rochester Intl 11.5606446 -5.0 2358
Hartsfield Jackson Atlanta Intl 11.3001128 -1.0 16837
Lambert St Louis Intl 11.0784645 -3.0 4142
Norfolk Intl 10.9490934 -4.0 1434
Baltimore Washington Intl 10.7267338 -5.0 1687
Memphis Intl 10.6453144 -2.5 1686
Port Columbus Intl 10.6013229 -3.0 3326
Charleston Afb Intl 10.5929685 -4.0 2759
Philadelphia Intl 10.1271901 -3.0 1541
Raleigh Durham Intl 10.0523810 -3.0 7770
Indianapolis Intl 9.9404341 -3.0 1981
Charlottesville-Albemarle 9.5000000 -5.0 46
Cleveland Hopkins Intl 9.1816113 -5.0 4394
Ronald Reagan Washington Natl 9.0669520 -2.0 9111
Burlington Intl 8.9509960 -4.0 2510
Buffalo Niagara Intl 8.9459519 -5.0 4570
Syracuse Hancock Intl 8.9039250 -5.0 1707
Denver Intl 8.6065002 -2.0 7169
Palm Beach Intl 8.5629721 -3.0 6487
Bob Hope 8.1756757 -3.0 370
Fort Lauderdale Hollywood Intl 8.0821215 -3.0 11897
Bangor Intl 8.0279330 -9.0 358
Asheville Regional Airport 8.0038314 -1.0 261
Pittsburgh Intl 7.6809905 -5.0 2746
Gallatin Field 7.6000000 -2.0 35
NW Arkansas Regional 7.4657258 -2.0 992
Tampa Intl 7.4085250 -4.0 7390
Charlotte Douglas Intl 7.3603189 -3.0 13674
Minneapolis St Paul Intl 7.2701689 -5.0 6929
William P Hobby 7.1761882 -4.0 2083
Bradley Intl 7.0485437 -10.0 412
San Antonio Intl 6.9453718 -9.0 659
South Bend Rgnl 6.5000000 -3.5 10
Louis Armstrong New Orleans Intl 6.4901750 -6.0 3715
Key West Intl 6.3529412 7.0 17
Eagle Co Rgnl 6.3043478 -4.0 207
Austin Bergstrom Intl 6.0199088 -5.0 2411
Chicago Ohare Intl 5.8766148 -8.0 16566
Orlando Intl 5.4546431 -5.0 13967
Detroit Metro Wayne Co 5.4299635 -7.0 9031
Portland Intl 5.1415797 -5.0 1342
Nantucket Mem 4.8522727 -3.0 264
Wilmington Intl 4.6355140 -7.0 107
Myrtle Beach Intl 4.6034483 -13.0 58
Albuquerque International Sunport 4.3818898 -5.5 254
George Bush Intercontinental 4.2407904 -5.0 7085
Norman Y Mineta San Jose Intl 3.4481707 -7.0 328
Southwest Florida Intl 3.2381496 -5.0 3502
San Diego Intl 3.1391657 -5.0 2709
Sarasota Bradenton Intl 3.0824313 -5.0 1201
Metropolitan Oakland Intl 3.0776699 -9.0 309
General Edward Lawrence Logan Intl 2.9143922 -9.0 15022
San Francisco Intl 2.6728915 -8.0 13173
Yampa Valley 2.1428571 2.0 14
Phoenix Sky Harbor Intl 2.0970473 -6.0 4606
Montrose Regional Airport 1.7857143 -10.5 14
Los Angeles Intl 0.5471109 -7.0 16026
Dallas Fort Worth Intl 0.3221268 -9.0 8388
Miami Intl 0.2990598 -9.0 11593
Mc Carran Intl 0.2577285 -8.0 5952
Salt Lake City Intl 0.1762546 -8.0 2451
Long Beach -0.0620272 -10.0 661
Martha\\'s Vineyard -0.2857143 -11.0 210
Seattle Tacoma Intl -1.0990991 -11.0 3885
Honolulu Intl -1.3651926 -7.0 701
John Wayne Arpt Orange Co -7.8682266 -11.0 812
Palm Springs Intl -12.7222222 -13.5 18
# Print the departure delay summary as a formatted table
kable(
  departure_delay_summary,
  format = "html",  # Use "html" for better formatting
  align = "c",  # Center align columns for readability
  col.names = c("Airport Name", "Mean Delay (min)", "Median Delay (min)", "Flights"),
  caption = "Departure Delay Summary"
) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Departure Delay Summary
Airport Name Mean Delay (min) Median Delay (min) Flights
Newark Liberty Intl 15.10795 -1 117596
John F Kennedy Intl 12.11216 -1 109416
La Guardia 10.34688 -3 101509

b. Flights with the fastest average speed

# Convert datasets to data.table
flights_dt <- as.data.table(flights)
planes_dt <- as.data.table(planes)

# Calculate speed (distance/time in hours) and find the fastest aircraft model
fastest_aircraft <- flights_dt[
  !is.na(air_time) & air_time > 0,  # Filter valid flights
  .(speed_mph = distance / (air_time / 60), tailnum)
][
  !is.na(tailnum),  # Ensure valid tailnum
][
  planes_dt, on = .(tailnum),  # Join with planes dataset to get aircraft model
  .(model, speed_mph)
][
  , .(
    avg_speed = mean(speed_mph, na.rm = TRUE),  # Calculate average speed
    num_flights = .N
  ), by = model
][
  order(-avg_speed)  # Order by descending average speed
][
  1  # Select the top result
]

# Print the fastest aircraft details
kable(
  fastest_aircraft,
  format = "html",
  align = "c",
  col.names = c("Model", "Average Speed (mph)", "Flights"),
  caption = "Fastest aircraft details"
) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Fastest aircraft details
Model Average Speed (mph) Flights
777-222 482.6254 4